from operator import itemgetter

print "Updated by Wei 20130318. PLEASE do NOT use this program."
print "The purpose of this program is to get the data for the X axis."
print "program begins..."
# I need to analyze the edge freq thing.
# All I need to know is the histogram for those term pair

# example: 
freqOfFreqDict = {}
infoFromDictList = []

totalFreq = 0

inputFileName = "/data3/obukai/the_new_trip_of_feature_generation/gov2ClearYourMindAndDoItAgain/queryTermPairProbabilityOfTheQueryTrace_sorted_by_query_term_pair"
inputFileHandler = open(inputFileName,"r")
for line in inputFileHandler.readlines():
    lineElements = line.strip().split(" ")
    
    uniqueID = lineElements[0]
    term_pair = lineElements[1]
    freq = int( lineElements[2] )
    
    if freq not in freqOfFreqDict:
        freqOfFreqDict[freq] = 1
    else:
        freqOfFreqDict[freq] += 1

for key in freqOfFreqDict:
    tuple = ( key,freqOfFreqDict[key] ) 
    infoFromDictList.append(tuple)

infoFromDictList.sort(cmp=None, key=itemgetter(0), reverse=False)

for tuple in infoFromDictList:
    print tuple
    (_,currentFreq) = tuple
    totalFreq += currentFreq
    
print "totalFreq:",totalFreq

inputFileHandler.close()
print "program ends."